cap cd "/Users/ddgueorg/Dropbox/C-GAP/C-GAP Replication Files/Coding"
cap cd "C:\Users\ejm5\Dropbox\C-GAP\C-GAP Replication Files\Coding"

clear all
set more off 
*****

**Data Composition**
cd "../Data"

use "cgap_5.2.dta", clear


/*MERGE IN ALL DATA*/
drop _merge
merge m:m province year using "Transparency Addition Sept 2012.dta", update replace force  
drop _merge
merge m:m province year using "Qinghua_fiscal_simple.dta", update   
drop _merge
sort province year
merge m:m province year using "IPv4 2009-2012.dta"
by provincecode, sort: egen min_ipv4=min(ipv4)
drop if provincecode==.
drop _merge
xtset provincecode year
merge m:m provincecode year using "All HHI.dta"
drop _merge
xtset provincecode year
merge m:m provincecode year using "Website Transparancy.dta", update force
drop _merge
xtset provincecode year
merge m:m provincecode year using "budget_trans.dta", update force
drop if province=="Xinjiang Military"

/*LABLE ALTERNATIVE INDICES*/

lab var ogi "Shenzhen Website Transparency Index (2004-2011)"
lab var  q_fiscal_trans "Qinghua Fiscal Transparency Data (2010-2011)"
lab var togiwaper "Peking University OGI Watch Index (2009-2011)"
lab var togipitiscore "Pollution Information Transparency Index (2008-2009"
lab var  togifinscore "CPSS Financial Transparency (2009-2011)"
graph matrix togiwaper togipitiscore  togifinscore q_fiscal_trans ogi, half

/*RECODING*/
set more off
generate internet_penetration= cpopnettot/ cpoptot
replace cfinexptot=cfinexptot*10 if year==2002 & provincecode==10
generate missuse_expend= (tdepmisnum /cfinexptot)*1000000
generate ln_missuse_pop=ln(((tdepmisnum /cpopadulttot)*1000000)+1)

/*Corrections*/
replace missuse_expend=21.2 if province=="Ningxia" & year==200
replace missuse_expend=. if province=="Hainan" & year==2008
replace missuse_expend=. if province=="Tianjin" & year==2001

/*RECODING*/
generate ln_missuse_expend=ln(missuse_expend)
generate cases_cap= (tdepmiscournum/ cpopadulttot)*1000000
generate college_per= cpopcoltot/ cpopadulttot
generate ln_gdp=ln(cecongrpnum)
generate ln_pop=ln(cpopadulttot)
generate time=2011-year
tabstat  tplistprovdum, by(year) stat(mean)
tabstat  tplistprovdum  tdisctransprovdum   tdepmiscournum, by(year) stat(mean)
egen acorr_total=rowtotal( acorrdiscopnum acorrdiscclnum acorrdiscpunlocnum acorrdiscpuncounum acorrdiscpunprovnum acorrdiscrepprovnum acorrproccorrnum acorrprocderenum acorrproccdnum)

/*CODES*/
generate national_city=1 if provincecode==1|provincecode==2|provincecode==9|provincecode==22
replace national_city=0 if national_city==.
generate ln_landcases=ln(tdeplandnum)

/*TIME SERIES*/
xtset provincecode year
drop _merge

/*SOE DATA*/
merge m:m provincecode year using "soe.dta"
generate soe_gdp= (stateowned100millionyuan*100000000)*100/cecongrpnum
xtset provincecode year

/*INTERNET CODES*/
generate no_ipv4=ipv4*5802*1000 if year==2005
replace no_ipv4=ipv4*9802*1000 if year==2006
replace no_ipv4=ipv4*13527*1000 if year==2007
replace no_ipv4=ipv4*18127*1000 if year==2008
replace no_ipv4=ipv4*23245*1000 if year==2009
replace no_ipv4=ipv4*27764*1000 if year==2010
replace no_ipv4=ipv4*33044*1000 if year==2011
generate ipv4_share=no_ipv4/cpopnettot
generate ipv6=d.cpopnettot-d.no_ipv4
generate ln_ipv6=ln(ipv6)
replace ln_ipv6=0-(ln(0-ipv6)) if ipv6<0
generate no_ipv6=cpopnettot-no_ipv4
generate ipv6_pop=no_ipv6/cpoptot

/*CORRECTIONS*/
replace tplistprovnum=59 if province=="Guangdong" & year==2011
replace tplistprovdum=1 if  tplistprovnum>0 &  tplistprovnum !=.
replace tplistprovdum=0 if  tplistprovnum==0
replace tselecnum = 26 if code == 192003
replace tselecnum = 100 if code == 192008
replace tselecnum = 15 if code == 192010
generate offices= cgovdeptnum+ cgovenfbnum
by province, sort: egen max_offices=max(offices)
replace max_offices=75 if province=="Sichuan"
replace max_offices=63 if province=="Tibet"
replace max_offices=63 if province=="Xinjiang"
replace max_offices=67 if province=="Qinghai"
generate powerlists_per=(tplistprovnum /max_offices)*100
generate provincecode2=provincecode if provincecode==19  |provincecode==22
generate province2=province if provincecode==19 |provincecode==22
generate y2000=year if year>=2000
drop transcore


/*GENERATE NEW VARIABLES*/

#delimit;
egen transcore=rowtotal( tplistprovdum tdisctransprovdum tdisctransrevdum tdiscpubsprovdum tdiscpubsrevdum 
tdisccitmprovdum tdisccitmrevdum tfeeprovdum tfeeupdatdum tfeedocdum tfeestanddum tproccatadum tprocnotidum tprocstanddum tselecdum);

#delimit;
egen trans_fees=rowtotal( tfeeprovdum tfeeupdatdum tfeedocdum tfeestanddum);

#delimit;
egen discretionary =rowmean(  tdisctransprovper tdiscpubsprovper tdisccommprovper);


#delimit;
sum tplistprovdum tplistprovnum tdisctransprovdum tdisctransrevdum tdiscpubsprovdum tdiscpubsrevdum tdisccitmprovdum tdisccitmrevdum 
tfeeprovdum tfeeupdatdum tfeedocdum tfeestanddum tproccatadum tprocnotidum tprocstanddum  tctrprovnum  tselecdum tselecnum  tplistcitnum 
tplistcitper tdisctransprovnum tdisctransprovper tdiscpubsprovnum tdiscpubsprovper tdisccommprovnum tdisccommprovper tproccitnum 
tproccitper tctrcitnum tctrcitper tseleccitnum tseleccitper;


#delimit;
pause on;
twoway (scatter tdepmisnum year), by(province);
pause;

cd "../Results/";
/**************************************************TABLES************************************************************/


/*Table 3.1*/
tabstat missuse_expend tdepmisnum cfinexptot, stat(median) by(year);
tabstat missuse_expend, stat(mean) by(year);

/*Table 3.2*/
#delimit;
factor tplistprovdum powerlists_per tdisctransprovdum tdisctransrevdum tdiscpubsprovdum tdiscpubsrevdum tdisccitmprovdum tdisccitmrevdum 
tfeeprovdum tfeeupdatdum tfeedocdum tfeestanddum tproccatadum tprocnotidum tprocstanddum  tselecdum tselecnum   
tplistcitper  tdisctransprovper  tdiscpubsprovper  tdisccommprovper  tproccitper   tseleccitper budget_prov budget_city,   factors(3)  pcf;
pause;
rotate, varimax;
pause;
predict f_process f_structure f_outputs, reg;

#delimit;

label variable f_process "Transparency Factor 1: Processes";

label variable f_structure "Transparency Factor 3: Structure";

label variable f_outputs "Transparency Factor 2: Outputs";

generate abbrev=abbrev(province,1);



/*Table 3.3*/
#delimit;
pwcorr f_process f_structure f_outputs discretionary  powerlists_per trans_fees   tselecnum budget_prov togiwaper togipitiscore  togifinscore q_fiscal_trans ogi, star(5);
#delimit;
xtset provincecode year;
pwcorr  miss ln_missuse_pop l.f_process l.f_structure l.f_outputs  tdeplandnum  utcpermany l2.internet l2.ipv4_share, star(5);


/*Table 3.4*/
#delimit cr
xtset provincecode year
xtpcse miss l.f_process, correlation(psar1) rhotype(dw)
outreg2 using trans_corrupt, e(rmse) bdec(3) sdec(3)  replace
xtpcse miss l.f_structure, correlation(psar1) rhotype(dw)
outreg2 using trans_corrupt, e(rmse) bdec(3) sdec(3)  
xtpcse miss l.f_outputs, correlation(psar1) rhotype(dw)
outreg2 using trans_corrupt, e(rmse) bdec(3) sdec(3)  
xtpcse miss  l.f_process l.f_structure l.f_outputs , correlation(psar1) rhotype(dw)
outreg2 using trans_corrupt, e(rmse) bdec(3) sdec(3)     
xtpcse miss  l.f_process l.f_structure l.f_outputs  tdepmistot l.ln_gdp l.ln_pop l.college_per i.year i.provincecode, correlation(psar1) rhotype(dw)
outreg2 using trans_corrupt, e(rmse) bdec(3) sdec(3)  
xtpcse miss l.discretionary tdepmistot l.ln_gdp l.ln_pop l.college_per i.year i.provincecode, correlation(psar1) rhotype(dw)
outreg2 using trans_corrupt, e(rmse) bdec(3) sdec(3)  
xtpcse miss l.trans_fees tdepmistot l.ln_gdp l.ln_pop l.college_per i.year i.provincecode, correlation(psar1) rhotype(dw)
outreg2 using trans_corrupt, e(rmse) bdec(3) sdec(3)  
xtpcse miss l.budget_prov tdepmistot l.ln_gdp l.ln_pop l.college_per i.year i.provincecode, correlation(psar1) rhotype(dw)
outreg2 using trans_corrupt, e(rmse) bdec(3) sdec(3)  
xtpcse miss l.powerlists_per tdepmistot l.ln_gdp l.ln_pop l.college_per i.year i.provincecode, correlation(psar1) rhotype(dw)
outreg2 using trans_corrupt, e(rmse) bdec(3) sdec(3)  excel



/******************Fill in Missing Data for Presentation Purposes**********/

/*Chapter 3 Graphics*/
#delimit;
xtset provincecode year;
replace powerlists_per=l.powerlists_per if powerlists_per==.;
replace miss=l.miss if miss==.;
replace f_process=l.f_process if f_process==.;
replace f_structure=l.f_structure if f_structure==.;
replace f_outputs=l.f_outputs if f_outputs==.;
replace discretionary=l.discretionary if discretionary==.;
replace transcore=l.transcore if transcore==.;
replace trans_fees=l.trans_fees if trans_fees==.;
replace budget_prov=l.budget_prov if budget_prov==.;


/**************************************************************************************************************************/
/*Figure 3.1*/
#delimit;
twoway (lowess miss year if miss<=40, lcolor(black)) (scatter miss year if miss<=40, mcolor(gs8)), by(province) ytitle("Misused Funds/Expenditures", size(medium)) legend(size(small) 
rows(1) label(1 Fitted Values) label(2 Observed Values)) xtitle("") ylab(0(20)40) scheme(s1mono) note("");

graph export "fig3_1_misuse.pdf", as(pdf) replace;

/**************************************************************************************************************************/
/*Figure 3.2*/
#delimit;
twoway (lfit powerlists_per year if year>=2002, mcolor(black)) (scatter powerlists_per year if year>=2002, mcolor(gs4) msymbol(square)), by(province) 
ytitle("Share of Provincial Offices with Power Lists", size(medium)) 
legend(size(small) rows(1) label(1 Fitted Values) label(2 Observed Values)) xtitle("") ylab(0(75)150, labsize(small)) scheme(s1mono);

graph export "fig3_2_power.pdf", as(pdf) replace;
pause;

/**************************************************************************************************************************/
/*Figure 3.3*/
#delimit;
preserve;
replace missuse_expend= (tdepmisnum /cfinexptot)*1000000;

collapse (sum) tplistprovdum tplistprovnum tdisctransprovdum tdisctransrevdum tdiscpubsprovdum 
tdiscpubsrevdum tdisccitmprovdum tdisccitmrevdum tfeeprovdum tfeeupdatdum tfeedocdum 
tfeestanddum tproccatadum tprocnotidum tprocstanddum tselecdum tdeplandnum budget_prov (mean) tctrcitper cases_cap ln_missuse_expend missuse_expend acorrproccorrnum 
trans_fees budget_city  , by(year);
drop if year<2000;
drop if year>2010;


#delimit;
twoway (line  tplistprovdum year, lwidth(thick)) (line  tfeeprovdum year, lwidth(thick) lpattern(longdash)) 
(line  tprocstanddum year, lwidth(thick) lpattern(shortdash)) (line  tproccatadum year, lwidth(thick) lpattern(dot)) , 
xlab(2000(2)2010) ylab(0(4)32) xtitle("") ytitle("Number of Provinces", size(medlarge) margin(medsmall)) 
legend(rows(4) size(tiny) label(1 Power Lists) label (2 Service Fees) label(3 Procurement Standards) 
label(4 Procurement Catalog) ring(0) position(5)) title("Documentation", size(large)) scheme(s1color);

graph save trans_docs.gph, replace;

#delimit;
twoway (line  tdisctransprovdum year, lcolor(black) lwidth(thick)) (line  tdisctransrevdum year, lcolor(black) lwidth(medthick) lpattern(dash)) 
(line tdiscpubsprovdum  year, lcolor(gs12) lwidth(thick)) (line  tdiscpubsrevdum year, lcolor(gs12) lwidth(medthick) lpattern(dash))
(line tdisccitmprovdum   year, lcolor(gs8) lwidth(thick)) (line tdisccitmrevdum year, lcolor(gs8) lwidth(medthick) lpattern(dash)), 
xlab(2000(2)2010) ylab(0(4)32) xtitle("") ytitle("Number of Provinces", size(medlarge) margin(medsmall)) 
legend(rows(6) size(tiny) label(1 Transportation ) label (2 Transportation Revised ) label(3 Public Security) 
label(4 Public Security Revised) label(5 Commerce) label(6 Commerce Revised)
ring(0) position(11)) title("Discretionary Standards", size(large)) scheme(s1color);

graph save trans_standards.gph, replace;


#delimit;
twoway (line   tselecdum year, lwidth(thick) lcolor(gs4)), 
xlab(2000(2)2010) ylab(0(4)32) xtitle("") ytitle("Number of Provinces", size(medlarge) margin(medsmall)) 
legend(rows(4) size(vsmall) label(1 Power Lists) label (2 Service Fees) label(3 Procurement Standards) 
label(4 Procurement Catalog) ring(0) position(5)) title("Selection of Civil Servants", size(large)) scheme(s1color);

graph save trans_select.gph, replace;


#delimit;
twoway (line budget_prov year, lwidth(thick) lcolor(gs8)), 
xlab(2000(2)2010) ylab(0(4)32) xtitle("") ytitle("Number of Provinces", size(medlarge) margin(medsmall)) 
legend(rows(4) size(vsmall) label(1 Power Lists) label (2 Service Fees) label(3 Procurement Standards) 
label(4 Procurement Catalog) ring(0) position(5)) title("Provincial Budget Available", size(large)) scheme(s1color);

graph save trans_budget.gph, replace;

#delimit;
graph combine trans_docs.gph trans_budget.gph trans_standards.gph trans_select.gph, xcommon ycommon rows(2) cols(2) scheme(s1mono);

graph export "fig3_3_trans.pdf", as(pdf) replace;
restore;





/**************************************************************************************************************************/

/*Figure 3.4*/
#delimit;
preserve;
collapse miss f_structure f_outputs f_process powerlists_per transcore trans_fees tdeplandnum internet_pen ipv6_pop , by(year);

#delimit;
format powerlists_per  %9.3g;
format missuse_expend  %9.3g;
format internet %9.3g;
format f_structure %9.2g;
format f_outputs %9.2g;
format f_process %9.2g;
format ipv6_pop %9.2g;
replace tdeplandnum=tdeplandnum/1000;
format tdeplandnum %9.2g;
format trans_fees %9.2g;

#delimit;
twoway (scatter f_process year if year>=2000, mcolor(black) msize(medium) msymbol(diamond) mlab(f_process) mlabsize(tiny) mlabcolor(black) mlabposition(10))  
(line f_process year if year>=2000, lcolor(black) lwidth(medthin))  
(scatter  f_outputs year if year>=2000, mcolor(gs8) msize(medium) msymbol(square) mlab(f_outputs) mlabsize(tiny) mlabcolor(black) mlabposition(2))   
(line  f_outputs year if year>=2000, lcolor(gs8) lwidth(medthin) lpattern(dash))
(scatter  f_structure year if year>=2000, mcolor(gs12) msize(medium) msymbol(triangle) mlab(f_structure) mlabsize(tiny) mlabcolor(black) mlabposition(12))   
(line  f_structure year if year>=2000, lcolor(gs12) lwidth(medthin) lpattern(shortdash)),
legend(position(11) ring(0) size(vsmall) rows(3) label(1 "Factor 1: Processes") label(3 "Factor 2: Outputs") label(5 "Factor 3: Structure") 
label(2 "") label(4 "") label(6 "")) xtitle("")  ytitle("Transformed Transparency Score", size(medium) margin(medium)) xlabel(2000(1)2011, labsize(small))
scheme(s1mono);

graph export "fig3_4_trans.pdf", as(pdf) replace;
restore;

/**************************************************************************************************************************/

/*Figure 3.5*/
#delimit;
twoway (lfitci miss togifinscore  if year==2010 & togifinscore<500 & miss<=40) (scatter miss togifinscore  if year==2010 & togifinscore<500 & miss<=40 , 
mlabel(province) mlabsize(vsmall) mlabcolor(black) mcolor(navy) msize(vsmall) msymbol(diamond) mlabposition(9)), 
legend(position(7) ring(0) size(vsmall) rows(3) label(1 "95% CI") label(2 "Fitted Values") 
label(3 "Observed Values"))  xtitle("Financial Transparency Index (2010)")  
ytitle("Misused Funds/Expenditures (%)", size(medium) margin(medium)) xlabel(, labsize(small))
title("CPPS Data in 2010")
legend(size(vsmall)) scheme(s1mono);
graph save xs3.gph, replace;

#delimit;
twoway (lfitci miss f_structure if miss<=40) (scatter miss f_structure if miss<=40,  mcolor(gs12) msize(vsmall) msymbol(diamond))
(scatter miss f_structure if year==2010 & miss<=40, mlab(province) mlabsize(vsmall) mlabcolor(black) mcolor(gs4) msize(small) msymbol(square)), 
legend(position(1) ring(0) size(vsmall) rows(3) label(1 "95% CI") label(2 "Fitted Values") 
label(3 "Observed Values"))  xtitle()  
ytitle("Misused Funds/Expenditures (%)", size(medium) margin(medium)) xlabel(, labsize(small))
legend(rows(2) label(4 2010 Score) size(vsmall))
title("CGAP Data Multiple Years") scheme(s1mono);
graph save xs1.gph, replace;

#delimit;
graph combine xs1.gph xs3.gph, imargin(0 0);
graph export "fig3_5_fit.pdf", as(pdf) replace;


/**************************************************************************************************************************/

/*Figure 4.2*/
/*Case Selection*/

#delimit;
replace province="Inner Mongolia" if province=="InnerMongolia";

#delimit;
pwcorr miss ogi f_process f_structure f_outputs powerlists_per  togiwprovper togiwaper  togiwavol togiwavolfee togiwavolbudget togiwasupsuitaccept 
togipitiscore togipitivioscore togipiticampscore togipitiinfoscore  togifinscore togifinper togifinofficenum togifinfdeptnum togifinexpscore 
togifinbudgscore togifintpayscore togifinrevscore togisocsecscore togisoescore if year==2009, star(10);
pause;

#delimit;
xtset provincecode year;
reg miss togifinscore l.ln_gdp l.ln_pop l.college_per  if year==2010 & togifinscore<325, robust;
avplot togifinscore, mlabel(province) mlabsize(vsmall) mlabcolor(black) msymbol(diamond) mcolor(navy) msize(vsmall) mlabposition(6)
xtitle("Transparency|X", size(medium) margin(medium)) ytitle("Misused Funds/Expenditures(%)|X", size(medium) margin(medium))
xlabel(, labsize(small)) ylab(, labsize(small)) title("B. CPPSS Financial Transparency", size(medlarge)) scheme(s1mono);
graph save fig7_1b.gph, replace;


#delimit;
xtset provincecode year;
reg miss ogi l.ln_gdp l.ln_pop l.college_per  if year==2010, robust;
avplot ogi, mlabel(province) mlabsize(vsmall) mlabcolor(black) msymbol(diamond) mcolor(navy) msize(vsmall) mlabposition(6)
xtitle("Transparency|X", size(medium) margin(medium)) ytitle("Misused Funds/Expenditures(%)|X", size(medium) margin(medium))
xlabel(, labsize(small)) ylab(, labsize(small)) title("A. Shenzhen Website Transparency", size(medlarge)) scheme(s1mono);
graph save fig7_2b.gph, replace;

graph combine fig7_2b.gph fig7_1b.gph, imargin(vsmall) scheme(s1mono);
graph export "fig4_2_case.pdf", as(pdf) replace;




/**************************************************************************************************************************/

/*Figure 4.3*/
#delimit;
twoway (tsline budget_city if province=="Guangdong" & year<2012 & year>2004, lwidth(thick))  
(tsline budget_city if province=="Chongqing" & year<2012 & year>2004, lwidth(thick) lpattern(dash)), 
legend(size(small) rows(3) position(11) ring(0) label(1 Guangdong) label(2 Chongqing)) 
xtitle("") xlabel(2005(1)2011, labsize(small)) ytitle("Number of Cities in Province with Budget Posted", size(medium) margin(medium)) scheme(s1mono);
graph save Chap4_3a.gph, replace;

#delimit;
xtset provincecode year;
replace powerlists_per=77 if year==2011 & province=="Chongqing";

#delimit;
replace powerlists_per=93.75 if year==2005 & province=="Chongqing";
replace powerlists_per=93.75 if year==2006 & province=="Chongqing";
twoway (tsline powerlists_per if province=="Guangdong" & year<2012 & year>2004, lwidth(thick))  
(tsline powerlists_per if province=="Chongqing" & year<2012 & year>2004, lwidth(thick) lpattern(dash)), 
legend(off)
xtitle("") xlabel(2005(1)2011, labsize(small)) ytitle("Power Lists/Departments (%)", size(medium) margin(medium)) scheme(s1mono);
graph save Chap4_3b.gph, replace;

#delimit;
graph combine Chap4_3a.gph Chap4_3b.gph, common imargin(small) scheme(s1mono);
graph export "fig4_3_case.pdf", as(pdf) replace;


/**************************************************************************************************************************/

/*Figure 4.4*/
#delimit;
twoway (lowess miss y2000 if  miss<=40 & provincecode2==19 |provincecode2==22, lcolor(black)) 
(scatter miss y2000 if  miss<=40 & provincecode2==19 |provincecode2==22, mcolor(gs4)), by(province2) ytitle("Misused Funds/Expenditures", size(medium)) legend(size(small) 
rows(1) label(1 Fitted Values) label(2 Observed Values)) xtitle("") ylab(0(20)40) xlab(2000(2)2011) xline(2003 2007, lpattern(dash) lcolor(gs12) lwidth(thin)) scheme(s1mono);

graph export "fig4_4_misuse.pdf", as(pdf) replace;
pause;


